 /*
 
This program makes Appendix Table A6

Note that it produced the original trade regressions in the NBER WP


*/
/*

Written By: TFP
Modified on: 3/20/2019 by HR


This program uses the country dataset to estimate sourcing potentials
 

*/


 **Set directories
cd ""  /* PROJECT ROOT FOLDER */

/* DEFINE GLOBALS PATHS HERE */
global data "data"
global output output 
global input "data"


global disclosure "code"




use $data/country_imp_aff2007_ORIG.dta, clear  // this dataset is firms with some imports or MNE activity using the BEA region codes

*1. Prepare data for regressions
************************************************
replace mne_status="FOR" if mne_status=="FO2"
replace mne_status="DOM" if mne_status==""

*NOTE: we only consider a firm an MNE if it has a manuf affiliate.  I modify this below.
bys firmid : egen firm_num_man_aff_countries = total(man_aff)
replace mne_status="DOM" if firm_num_man_aff_countries==0 & mne_status=="MNE"

gen al_imp=imp_value-rp_imp_value



gen log_imports=log(imp_value)
gen log_exports=log(exp_value)
gen log_dist=log(distw)
gen log_tau=log(1+tariff_avg)
gen log_tauw=log(1+tariff_wavg)
gen log_gdp=log(gdp)
gen log_sales=log(sales)

gen log_imports_al=log(al_imp)
gen log_imports_rp=log(rp_imp_value)




gen low_income=1 if gdp_pc_qtile<=2
  replace low_income=0 if gdp_pc_qtile>2 & gdp_pc_qtile~=.
  


*Define indicator for aff in region (but not country)
**************

bys firmid region_det: egen num_aff_countries_r = total(man_aff)
bys firmid region_det: egen num_for_countries_r = total(for)  //  note that is can only be 1!
   tab num_for_countries_r

foreach var in imp exp {
  bys firmid region_det: egen num_`var'_countries_r = total(`var'orter)
  gen reg_`var'orter=1 if num_`var'_countries_r - `var'orter > 0
   replace reg_`var'orter=0 if num_`var'_countries_r - `var'orter <= 0
   replace reg_`var'orter=0 if `var'orter==1
  label variable reg_importer "`var'orter from region but not country"  
   }

gen reg_man_aff=1 if num_aff_countries_r - man_aff > 0
   replace reg_man_aff=0 if num_aff_countries_r - man_aff <= 0
   replace reg_man_aff=0 if man_aff==1   

 
label variable reg_man_aff "Affiliate in region but not country"   
   
*Define an indicator for region of the foreign-owned firm
gen reg_for=1 if num_for_countries_r==1 & for~=1
   replace reg_for=0 if reg_for==.
   tab for, miss
   tab reg_for, miss 
**************

*Distance interaction terms
 foreach var in man_aff for reg_man_aff reg_for {
	gen dist_`var'=log_dist*`var'
	}



*label variables for regression output 
label variable log_dist "log(distance_{c})"
label variable log_tauw "log(1+tariff_{c})"
label variable log_gdp "log(GDP_{c})"
label variable comlang_e "Common Language_{c}"
label variable contig "Contiguous_c"



label variable man_aff "Manuf Affiliate_{fc}"
label variable for "Foreign-owned_{fc}"
label variable reg_man_aff "Manuf Affil in Reg_{fc}"
label variable reg_for "Foreign-owned in Reg_{fc}"

label variable dist_man_aff "log(distance) \times Manuf Affiliate_{fc}"
label variable dist_for "log(distance) \times Foreign Owned_{fc}"
label variable dist_reg_man_aff "log(distance_c) \times Manuf Affil in Reg_{fc}"
label variable dist_reg_for "log(distance_c) \times Foreign-owned in Reg_{fc}"

gen all=1

save "$data/imp_int_regs.dta", replace
***************************************************************************

use "$data/imp_int_regs.dta", clear

*Create categorical vars for disclosure
foreach var in man_aff for reg_man_aff reg_for comlang_ethno contig {
  	    gen `var'_c="`var'=0" if `var'==0
	replace `var'_c="`var'=1" if `var'==1
	}
	
*2.  Run regressions (regressions in the original NBER WP)
***************************************************************************
save $data/temp_pre_reg.dta, replace

foreach var in imports  exports {
use $data/temp_pre_reg.dta, clear
estimates clear

*limit to countries with data	
reghdfe log_`var' comlang_ethno log_dist log_gdp contig, a(firmid) cluster(firmid iso)
 gen sampa_`var'=e(sample)

*most stringent reg to set sample
 reghdfe log_`var' man_aff for dist_man dist_for reg_man_aff reg_for dist_reg_man_aff dist_reg_for if sampa_`var'==1, a(firmid iso) cluster(firmid iso)
      do "$disclosure/count_regs.do"
      summ log_`var' if e(sample)
      estadd scalar `var'_mean=r(mean)
      estadd scalar `var'_sd=r(sd)
      estimates store gfce_6
      gen samp_`var'=e(sample)
    
  
   
*Run disclosure statistics
**************************************   
save $data/int_pre_disc_`var'.dta, replace




*Cross tab of import and export samples to verify no implicit samples

use firmid iso3 sales imp_value importer emp samp_imports mne_status using $data/int_pre_disc_imports.dta, clear
   merge 1:1 firmid iso3 using $data/int_pre_disc_exports.dta, keepusing(sales exp_value exporter emp samp_exports mne_status) update

 tab samp_imports samp_exports, miss


gen firms=1

gen import=imp_value
gen export=exp_value
save $data/temp.dta, replace
 
 
**Summary stats and disclosure for firm imports and exports
************************************************************
foreach flow in import export {
  use $data/temp.dta, clear
  
*drop if `flow'_status=="None"
bys firmid: egen tot_c=sum(`flow'er)

*drop firm-country observations that are outside the sample for firms that have 2+ countries
 drop if samp_`flow's==0 & tot_c>1
  
collapse (sum) `flow'_countries=`flow'er `flow' (mean) emp sales, by(firmid mne_status) 

tabstat `flow'_c, by(mne_status) stats(mean median sd)

tabstat `flow'_c if `flow'_c>1, by(mne_status) stats(mean median sd)

*Make fuzzy medians
 	gen median_status_`flow'=""
	gen `flow'ers=1 if `flow'_c>0
	gen `flow'ers_mc=1 if `flow'_c>1
	gen `flow'_status="None" if `flow'_c==0
	   replace `flow'_status="1 country" if `flow'_c==1
	   replace `flow'_status="2+ countries" if `flow'_c>1
	 gen med_`flow'_countries=. 
	 foreach st in DOM FOR MNE {  
		preserve   
			keep if `flow'_status=="2+ countries" & mne_status=="`st'" 
			_pctile `flow'_c, p(49 51)    
		restore
		replace med_`flow'_countries=`flow'_c if `flow'_c>=r(r1) & `flow'_c<=r(r2) & mne_status=="`st'"
		replace median_status_`flow'="in median" if med_`flow'_countries ~=.
		replace median_status_`flow'="not in median" if med_`flow'_countries ==.
		}
		
drop if `flow'_status=="None"	
	
	
	
*Run disclosure statistics
************************************** 
gen `flow'_stat="1c" if `flow'_status=="1 country"
	replace `flow'_stat="2+c" if `flow'_status=="2+ countries"

gen category_`flow'=""
levelsof mne_status, local(levels) 
      foreach i of local levels {
	replace category="mne_status=`i' & `flow'_stat=1c" if mne_status=="`i'" & `flow'_stat=="1c"
	replace category="mne_status=`i' & `flow'_stat=2+c & in median" if mne_status=="`i'" & `flow'_stat=="2+c" & median_status_`flow'=="in median"
	replace category="mne_status=`i' & `flow'_stat=2+c & not in median" if mne_status=="`i'" & `flow'_stat=="2+c"  & median_status_`flow'=="not in median"
         }

save "$data/disc_`flow'.dta", replace
  
gen total_sample="All firms"
gen sample_2c="2+c `flow'ers" if `flow'_stat=="2+c"
gen sample_1c="Implicit Sample" if `flow'_stat=="1c"


******************************************


collapse (sum)  `flow' `flow'ers  (mean) avg_`flow'_countries=`flow'_c med_`flow'_c , by(mne_status `flow'_status)


set obs 7
replace mne_status="Total" if _n==_N




foreach var in `flow' `flow'ers {
	capture drop tot
	egen tot_`var'=sum(`var')
	replace `var'=tot_`var' if mne_status=="Total"
	gen share_`var'=`var'/tot_`var'
	}


foreach var in avg_`flow'_countries med_`flow'_countries {
	replace `var'=. if `flow'_status=="1 country"
	}

*drop tot share 

*Round for disclosure
global round_var="`flow' avg_`flow'_countries med_`flow'_c"
do "$disclosure/rounding_pks.do"
global count_var="`flow'er"
do "$disclosure/count.do"


*drop if mne_status=="Total"

format share* %9.2fc

export excel mne_status `flow'_status share_`flow'ers share_`flow' avg_`flow'_countries med_`flow'_countries using "$output/TABLE_A6.xls" if ///
 `flow'_status=="2+ countries", sheetreplace sheet(`flow's) firstrow(var)
 


}
